source("../GAA-EVAL.R")
## Warning: package 'ggplot2' was built under R version 3.4.4
## Warning: package 'dplyr' was built under R version 3.4.4
## Warning: package 'pROC' was built under R version 3.4.4
## Warning: package 'reshape2' was built under R version 3.4.3
## Warning: package 'ggrepel' was built under R version 3.4.4
## Warning: package 'gmodels' was built under R version 3.4.4
## Warning: package 'stringr' was built under R version 3.4.4
exp.data <- read.RealData(file = "exp_data.csv", sep = "\t",
col.id = 1, col.value = 2, col.sd = 3,na.character = 'NA')
length(exp.data$value)
exp.data.s <- read.RealData(file = "exp_data_s.csv", sep = ",",
col.id = 1, col.value = 2, col.sd = 3,na.character = 'NA')
head(exp.data.s$value)
sub.data <- read.Submission.Folder(folder.name = "prediction/",col.id = 1,
col.value = 2, col.sd = 3, real.data = exp.data)
sub.data.s = read.Submission.Folder(folder.name = "prediction/",col.id = 1,
col.value = 2, col.sd = 3, real.data = exp.data.s)
sub.data = addGroup(sub.data,c(1,1,2,3,3,4,4,5,6,6,6,6,7,8,9,9))
sub.data.s = addGroup(sub.data.s,c(1,1,2,3,3,4,4,5,6,6,6,6,7,8,9,9))
plot_all_scatter(real.data = exp.data, pred.data = sub.data, z.transform = TRUE)
## Warning: Removed 604 rows containing missing values (geom_point).
## Warning: Removed 5109 rows containing missing values (geom_segment).
## Warning: Removed 604 rows containing missing values (geom_point).
## Warning: Removed 5109 rows containing missing values (geom_segment).
## Warning: Removed 4894 rows containing missing values (geom_point).
## Warning: Removed 5109 rows containing missing values (geom_segment).
## Warning: Removed 604 rows containing missing values (geom_point).
## Warning: Removed 5109 rows containing missing values (geom_segment).
## Warning: Removed 604 rows containing missing values (geom_point).
## Warning: Removed 5109 rows containing missing values (geom_segment).
## Warning: Removed 4894 rows containing missing values (geom_point).
## Warning: Removed 5109 rows containing missing values (geom_segment).
## Warning: Removed 4894 rows containing missing values (geom_point).
## Warning: Removed 5109 rows containing missing values (geom_segment).
## Warning: Removed 604 rows containing missing values (geom_point).
## Warning: Removed 5109 rows containing missing values (geom_segment).
## Warning: Removed 604 rows containing missing values (geom_point).
## Warning: Removed 5109 rows containing missing values (geom_segment).
## Warning: Removed 604 rows containing missing values (geom_point).
## Warning: Removed 5109 rows containing missing values (geom_segment).
## Warning: Removed 604 rows containing missing values (geom_point).
## Warning: Removed 5109 rows containing missing values (geom_segment).
## Warning: Removed 604 rows containing missing values (geom_point).
## Warning: Removed 5109 rows containing missing values (geom_segment).
## Warning: Removed 4484 rows containing missing values (geom_point).
## Warning: Removed 5109 rows containing missing values (geom_segment).
## Warning: Removed 604 rows containing missing values (geom_point).
## Warning: Removed 5109 rows containing missing values (geom_segment).
## Warning: Removed 604 rows containing missing values (geom_point).
## Warning: Removed 5109 rows containing missing values (geom_segment).
## Warning: Removed 604 rows containing missing values (geom_point).
## Warning: Removed 5109 rows containing missing values (geom_segment).
without bootstrap method = “pearson”: In this example, we use pearson correlation sd.use = 0.3: experimental value with sd larger than 0.3 is filtered out z.transformation does not have effect to pearson correlation
# 1. Render coefficient value
result.cor.pearson <- eval.Correlation(real.data = exp.data.s, pred.data = sub.data.s,
method = "pearson", sd.use = 0.3,z.transform = TRUE)
head(result.cor.pearson)
## pearson.coefficient.n=205.sd<0.3 p.value
## Group_39-prediction_file-1 0.3572199 1.457490e-07
## Group_39-prediction_file-2 0.3540692 1.906731e-07
## Group_40-prediction_file-1 0.2995061 1.285132e-05
## Group_41-prediction_file-1 0.3794999 2.003074e-08
## Group_41-prediction_file-2 0.3794999 2.003074e-08
## Group_42-prediction_file-1 0.1345214 5.447680e-02
# 2. Plot Correlation
plot.Correlation(result.cor.pearson, "Pearson")
Bootstrap using row replacement (rep.time = 500 as a fixed input for now) boot = T, boot.var = F
# 1. Render coefficient value
boot.result.cor.pearson <- eval.Correlation(real.data = exp.data.s, pred.data = sub.data.s,
method = "pearson", sd.use = 0.3,z.transform = TRUE,boot = T)
# For Correlation-based Evaluation, provide mean, CI, and median pval
head(boot.result.cor.pearson)
## avg low_ci high_ci sd
## Group_39-prediction_file-1 0.3584616 0.248757113 0.4650019 0.06693278
## Group_39-prediction_file-2 0.3551402 0.247152353 0.4670083 0.06645212
## Group_40-prediction_file-1 0.3035865 0.203474870 0.4011387 0.06101202
## Group_41-prediction_file-1 0.3814340 0.299085806 0.4588648 0.04934251
## Group_41-prediction_file-2 0.3814340 0.299085806 0.4588648 0.04934251
## Group_42-prediction_file-1 0.1300704 0.008814297 0.2653382 0.07935949
## p.value
## Group_39-prediction_file-1 1.308500e-07
## Group_39-prediction_file-2 1.719168e-07
## Group_40-prediction_file-1 1.016501e-05
## Group_41-prediction_file-1 1.373425e-08
## Group_41-prediction_file-2 1.373425e-08
## Group_42-prediction_file-1 7.654748e-02
# 2. Plot Correlation
plot.Correlation(boot.result.cor.pearson, "Pearson",boot = TRUE)
Bootstrap using distribution modelling (rep.time = 500 as a fixed input for now) boot = T, boot.var = T
bootvar.result.cor.pearson <- eval.Correlation(real.data = exp.data.s, pred.data = sub.data.s,
method = "pearson", sd.use = 0.3,z.transform = TRUE,boot = T,boot.var = T )
head(bootvar.result.cor.pearson)
## avg low_ci high_ci sd
## Group_39-prediction_file-1 0.3547601 0.3412656 0.3692032 0.008070335
## Group_39-prediction_file-2 0.3515426 0.3375549 0.3655065 0.008193730
## Group_40-prediction_file-1 0.2973147 0.2833942 0.3098228 0.007979621
## Group_41-prediction_file-1 0.3772570 0.3641273 0.3895295 0.007732230
## Group_41-prediction_file-2 0.3772570 0.3641273 0.3895295 0.007732230
## Group_42-prediction_file-1 0.1335184 0.1191248 0.1464837 0.008738655
## p.value
## Group_39-prediction_file-1 1.774906e-07
## Group_39-prediction_file-2 2.367728e-07
## Group_40-prediction_file-1 1.471955e-05
## Group_41-prediction_file-1 2.475738e-08
## Group_41-prediction_file-2 2.475738e-08
## Group_42-prediction_file-1 5.595699e-02
plot.Correlation(bootvar.result.cor.pearson, "Pearson",boot = TRUE)
- (for demonstration, only present the bootstrapped result)
- variance.normalization: defined on PPT
- density.distance: RMSD based on distribution model of experimental value
# without variance.normalization + without density.distance
boot.result.rmsd4 <- eval.RMSD(real.data = exp.data.s, pred.data = sub.data.s,sd.use = NA,
density.distance = FALSE,variance.normalization = FALSE,boot = TRUE)
head(boot.result.rmsd4)
## RMSD low_ci high_ci sd
## Group_39-prediction_file-1 0.7582194 0.6948829 0.8238805 0.03795513
## Group_39-prediction_file-2 0.7236246 0.6616226 0.7896134 0.03916140
## Group_40-prediction_file-1 0.6193394 0.5589046 0.6751160 0.03625268
## Group_41-prediction_file-1 0.5763456 0.5352569 0.6192372 0.02490414
## Group_41-prediction_file-2 0.5763456 0.5352569 0.6192372 0.02490414
## Group_42-prediction_file-1 0.6400337 0.5800956 0.7060908 0.03955404
plot.RMSD(boot.result.rmsd4, method="",boot = TRUE)
# with variance.normalization + without density.distance
boot.result.rmsd2 <- eval.RMSD(real.data = exp.data.s, pred.data = sub.data.s,sd.use = NA,
density.distance = FALSE,variance.normalization = TRUE,boot = TRUE)
head(boot.result.rmsd2)
## RMSD low_ci high_ci sd
## Group_39-prediction_file-1 1.096020 0.9974707 1.201784 0.06331646
## Group_39-prediction_file-2 1.018759 0.9262273 1.111976 0.05627738
## Group_40-prediction_file-1 1.141237 1.0454992 1.243660 0.05919249
## Group_41-prediction_file-1 1.581274 1.4056522 1.775854 0.11120052
## Group_41-prediction_file-2 1.581274 1.4056522 1.775854 0.11120052
## Group_42-prediction_file-1 1.620394 1.4547180 1.774669 0.10079146
plot.RMSD(boot.result.rmsd2, method="",boot = TRUE)
# with variance.normalization + with density.distance
boot.result.rmsd <- eval.RMSD(real.data = exp.data.s, pred.data = sub.data.s,sd.use = NA,
density.distance = TRUE, variance.normalization = TRUE, boot = TRUE)
head(boot.result.rmsd)
## RMSD low_ci high_ci sd
## Group_39-prediction_file-1 1.0730166 0.9615569 1.181177 0.06564373
## Group_39-prediction_file-2 0.9985357 0.9006786 1.089820 0.05708015
## Group_40-prediction_file-1 1.1270789 1.0319753 1.223254 0.05692394
## Group_41-prediction_file-1 1.5582421 1.3888112 1.744497 0.11175008
## Group_41-prediction_file-2 1.5582421 1.3888112 1.744497 0.11175008
## Group_42-prediction_file-1 1.6294310 1.4715430 1.777417 0.09680995
plot.RMSD(boot.result.rmsd, method="",boot = TRUE)
threshold = 0.6: The cutoff for positive/negative value
result.auc.0.6 <- eval.AUC(real.data =exp.data.s, pred.data = sub.data.s,
threshold = 0.6)
head(result.auc.0.6$results)
## AUC.0.6 sensitivity specificity accuracy
## Group_39-prediction_file-1 0.6885 0.7931 0.4844 0.6093
## Group_39-prediction_file-2 0.6865 0.7241 0.5703 0.6326
## Group_40-prediction_file-1 0.6929 0.5057 0.7734 0.6651
## Group_41-prediction_file-1 0.6739 0.9540 0.3438 0.5907
## Group_41-prediction_file-2 0.6739 0.9540 0.3438 0.5907
## Group_42-prediction_file-1 0.5975 0.1839 0.8828 0.6000
## precision f1_score bAccu
## Group_39-prediction_file-1 0.5111 0.6216 0.6387
## Group_39-prediction_file-2 0.5339 0.6146 0.6472
## Group_40-prediction_file-1 0.6027 0.5500 0.6396
## Group_41-prediction_file-1 0.4970 0.6535 0.6489
## Group_41-prediction_file-2 0.4970 0.6535 0.6489
## Group_42-prediction_file-1 0.5161 0.2712 0.5334
plot.AUC(result.auc.0.6)
# for all the submission files
result.bM.spearman <-eval.Correlation.Between(real.data = exp.data.s, pred.data = sub.data.s,
method = "spearman",sd.use = NA,z.transform = TRUE)
plot.Correlation.Between(result.bM.spearman$coefficient, method="spearman")
# for best submission of each group
result.bM.pearson <-eval.Correlation.Between(real.data = exp.data.s, pred.data = sub.data.s,
method = "pearson",sd.use = NA,z.transform = TRUE,grouped = TRUE)
## Warning: package 'bindrcpp' was built under R version 3.4.4
plot.Correlation.Between(result.bM.pearson$coefficient, method="pearson")
result.pCor <- eval.Partial.Correlation(real.data = exp.data.s, pred.data = sub.data.s, method = "spearman")
## Loading required package: ppcor
## Loading required package: MASS
## Warning: package 'MASS' was built under R version 3.4.4
##
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
##
## select
## Warning in pcor(mat, method = "spearman", ...): The inverse of variance-
## covariance matrix is calculated using Moore-Penrose generalized matrix
## invers due to its determinant of zero.
## Warning in sqrt((n - 2 - gp)/(1 - pcor^2)): NaNs produced
plot.Correlation.Between(result.bM.spearman$coefficient, method="Spearman")
total = cbind(real = exp.data.s$value,sub.data.s$value)
Plot.PCA(na.omit(total), labels=F, legend=TRUE)
uniqueness as adj.r^2 difference between total linear model and linear models without certain group use.ci = T : if true error bar as confidence interval; if false, error bar as standard deviation
# resampling row
result.uniq = eval.uniqueness(real.data = exp.data.s, pred.data = sub.data.s,boot = TRUE)
result.uniq
## uniqueness low_ci high_ci
## Group_39-prediction_file-1 0.001239471 -0.003921640 0.01448081
## Group_40-prediction_file-1 0.002164771 -0.003914487 0.01914733
## Group_41-prediction_file-1 0.010086041 -0.003396465 0.03346761
## Group_42-prediction_file-1 0.003068687 -0.003855298 0.02100289
## Group_43-prediction_file-1 0.006215759 -0.003630618 0.02331629
## Group_44-prediction_file-1 0.005379891 -0.003879752 0.02515186
## Group_45-prediction_file-1 0.001870676 -0.003901157 0.01553485
## Group_46-prediction_file-1-late 0.003285608 -0.003858204 0.01936809
## Group_47-prediction_file-1 0.006514064 -0.003738604 0.02772943
## sd
## Group_39-prediction_file-1 0.006207335
## Group_40-prediction_file-1 0.007790426
## Group_41-prediction_file-1 0.011968472
## Group_42-prediction_file-1 0.009212072
## Group_43-prediction_file-1 0.010268351
## Group_44-prediction_file-1 0.010685242
## Group_45-prediction_file-1 0.006824955
## Group_46-prediction_file-1-late 0.008352844
## Group_47-prediction_file-1 0.010923985
plot.uniqueness(result.uniq, method="", boot = TRUE,use.ci = T)
plot.uniqueness(result.uniq, method="", boot = TRUE,use.ci = F)
# generating from experimental distribution
result.bootvar.uniq = eval.uniqueness(real.data = exp.data.s, pred.data = sub.data.s,boot = TRUE,boot.var = T)
## Warning in rnorm(rep.time, real.data$value[x], real.data$sd[x]): NAs
## produced
## Warning in rnorm(rep.time, real.data$value[x], real.data$sd[x]): NAs
## produced
## Warning in rnorm(rep.time, real.data$value[x], real.data$sd[x]): NAs
## produced
## Warning in rnorm(rep.time, real.data$value[x], real.data$sd[x]): NAs
## produced
## Warning in rnorm(rep.time, real.data$value[x], real.data$sd[x]): NAs
## produced
## Warning in rnorm(rep.time, real.data$value[x], real.data$sd[x]): NAs
## produced
result.bootvar.uniq
## uniqueness low_ci high_ci
## Group_39-prediction_file-1 -0.0027158048 -0.0039623391 -0.0005103729
## Group_40-prediction_file-1 -0.0018109820 -0.0037960328 0.0010033611
## Group_41-prediction_file-1 0.0068249980 0.0020283647 0.0123611926
## Group_42-prediction_file-1 0.0003406908 -0.0024782399 0.0047476006
## Group_43-prediction_file-1 0.0038366420 -0.0001059559 0.0080785405
## Group_44-prediction_file-1 0.0019369571 -0.0016295498 0.0059734041
## Group_45-prediction_file-1 -0.0005945561 -0.0029082755 0.0029420296
## Group_46-prediction_file-1-late 0.0025801581 -0.0016677633 0.0078707958
## Group_47-prediction_file-1 0.0041009320 -0.0011966605 0.0116263239
## sd
## Group_39-prediction_file-1 0.001219032
## Group_40-prediction_file-1 0.001545033
## Group_41-prediction_file-1 0.003089869
## Group_42-prediction_file-1 0.002150395
## Group_43-prediction_file-1 0.002562105
## Group_44-prediction_file-1 0.002378842
## Group_45-prediction_file-1 0.001810595
## Group_46-prediction_file-1-late 0.002978781
## Group_47-prediction_file-1 0.003865104
plot.uniqueness(result.bootvar.uniq, method="",boot = TRUE,use.ci = F)